Exploracion de datos ambientales y crecimiento en centro de cultivo de la XI region, sector Huaitecas.

rt <- read_excel("/cloud/project/RT.xlsx")
summary(rt)
##       sem           sgr               o2              temp      
##  Min.   :  1   Min.   :0.1600   Min.   : 5.398   Min.   : 9.60  
##  1st Qu.:114   1st Qu.:0.5400   1st Qu.: 7.548   1st Qu.:10.51  
##  Median :227   Median :0.7700   Median : 8.320   Median :11.47  
##  Mean   :227   Mean   :0.8765   Mean   : 8.030   Mean   :11.48  
##  3rd Qu.:340   3rd Qu.:1.1600   3rd Qu.: 8.653   3rd Qu.:12.36  
##  Max.   :453   Max.   :2.5000   Max.   :10.974   Max.   :13.33

Graficas incluidas

Para data revisada se realiza y se busca la mejor correlacion entre las variables

pairs.panels(rt[,2:4], method = "pearson", hist.col = "magenta",  density = FALSE, font=2)

corPlot(rt , cex = 1.2 , main = "Matriz de correlación")

##al obtener una correlacion mas baja mediante Pearson intentaremos mediante Spearman##

correlation <- cor(rt$sgr, rt$o2, method = 'spearman')

obtuvimos una correlacion de 0.649

##buscamos relacion grafica##

ggplot(rt, aes(x=sgr, y=o2)) + 
  geom_point() + theme_light()

ggplot(rt, aes(x=sem, y=o2)) + 
  geom_point() + theme_light()

ggplot(rt, aes(x=sgr, y=temp)) + 
  geom_point() + theme_light()

##obtendremos las estimaciones de los parametros estadisticos##

mod1 <- lm(sgr ~ o2, data=rt)
mod1
## 
## Call:
## lm(formula = sgr ~ o2, data = rt)
## 
## Coefficients:
## (Intercept)           o2  
##     -1.2128       0.2602

En la salida anterior se observan los valores estimados de ??0 y ??1 pero no aparece la estimacion de ?? Para obtener una tabla de resumen con detalles del modelo ajustado, se usa la funcion generica summary

summary(mod1)
## 
## Call:
## lm(formula = sgr ~ o2, data = rt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.20260 -0.24981 -0.07131  0.16356  1.50738 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.21280    0.16107   -7.53  2.8e-13 ***
## o2           0.26020    0.01993   13.05  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3881 on 451 degrees of freedom
## Multiple R-squared:  0.2743, Adjusted R-squared:  0.2727 
## F-statistic: 170.4 on 1 and 451 DF,  p-value: < 2.2e-16

Para incluir la recta de regresion que representa el modelo ajustado anterior…

ggplot(rt, aes(x=sgr, y=o2)) + 
  geom_point() +
  geom_smooth(method='lm', formula=y~x, se=FALSE, col='dodgerblue1') +
  theme_light()

ggplot(rt, aes(x=sem, y=o2)) + 
  geom_point() +
  geom_smooth(method='lm', formula=y~x, se=FALSE, col='dodgerblue1') +
  theme_light()

ggplot(rt, aes(x=sem, y=temp)) + 
  geom_point() +
  geom_smooth(method='lm', formula=y~x, se=FALSE, col='dodgerblue1') +
  theme_light()

la regresion lineal no se ajusta de buena manera a la nube de datos

utilizaremos regresion multiple

library(scatterplot3d)
attach(rt)
scatterplot3d(x=temp, y=o2, z=sgr, pch=16, cex.lab=1,
              highlight.3d=TRUE, type="h", xlab='Temp (C?)',
              ylab='O2 (mg/l)', zlab='SGR')

a medida que aumenta la temperatura, aumenta el SGR y en condiciones de mayor o2

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(x=temp, y=o2, z=sgr, type="scatter3d", color=sgr) %>% 
  layout(scene = list(xaxis = list(title = 'Temp (C?)'),
                      yaxis = list(title = 'o2 (mg/l)'),
                      zaxis = list(title = 'SGR')))
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
<<<<<<< HEAD
=======
>>>>>>> 4a8f73ff63be2189510936b6debbd7d1bc875a50

#basandonos en el modelo 3d, la expresion que se ajusta es:

mod <- lm(sgr ~ o2 + temp, data=rt)
summary(mod)
## 
## Call:
## lm(formula = sgr ~ o2 + temp, data = rt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.27840 -0.24930 -0.08086  0.18735  1.48631 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.73268    0.38872  -4.457 1.05e-05 ***
## o2           0.27823    0.02338  11.898  < 2e-16 ***
## temp         0.03266    0.02223   1.469    0.143    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3876 on 450 degrees of freedom
## Multiple R-squared:  0.2777, Adjusted R-squared:  0.2745 
## F-statistic: 86.52 on 2 and 450 DF,  p-value: < 2.2e-16

Para incluir el plano de regresion que representa el modelo ajustado anterior

Se crea el grafico 3d y se guarda en un objeto, por ejemplo mi_3d

mi_3d <- scatterplot3d(x=temp, y=o2, z=sgr, pch=16, cex.lab=1,
                       highlight.3d=TRUE, type="h", xlab='Temp (C?)',
                       ylab='O2 (mg/l)', zlab='SGR')

mi_3d$plane3d(mod, lty.box = "solid", col='mediumblue')

library(ggplot2)
ggplot(rt, aes(x=sgr, y=o2))+
    geom_point() +
    geom_line(aes(y=sgr), color="red", linetype="dashed") +
    geom_line(aes(y=o2), color="red", linetype="dashed") +
    geom_smooth(method=lm, formula=y~x, se=TRUE, level=0.95, col='blue', fill='pink2') +
    theme_light()